#include <cstdlib>

unsigned char mask[]={0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01};
#define tget(i) ( (t[(i)/8]&mask[(i)%8]) ? 1 : 0 )
#define tset(i, b) t[(i)/8]=(b) ? (mask[(i)%8]|t[(i)/8]) : ((~mask[(i)%8])&t[(i)/8])
#define chr(i) (cs==sizeof(int)?((int*)s)[i]:((unsigned char *)s)[i])
#define isLMS(i) (i>0 && tget(i) && !tget(i-1))
// find the start or end of each bucket
void getBuckets(unsigned char *s, int *bkt, int n, int K, int cs, bool end) {
int i, sum=0;
for(i=0; i<=K; i++) bkt[i]=0; // clear all buckets
for(i=0; i<n; i++) bkt[chr(i)]++; // compute the size of each bucket
for(i=0; i<=K; i++) { sum+=bkt[i]; bkt[i]=end ? sum : sum-bkt[i]; }
}
// compute SAl
void induceSAl(unsigned char *t, int *SA, unsigned char *s, int *bkt,
int n, int K, int cs, bool end) {
int i, j;
getBuckets(s, bkt, n, K, cs, end); // find starts of buckets
for(i=0; i<n; i++) {
j=SA[i]-1;
if(j>=0 && !tget(j)) SA[bkt[chr(j)]++]=j;
}
}
// compute SAs
void induceSAs(unsigned char *t, int *SA, unsigned char *s, int *bkt,
int n, int K, int cs, bool end) {
int i, j;
getBuckets(s, bkt, n, K, cs, end); // find ends of buckets
for(i=n-1; i>=0; i--) {
j=SA[i]-1;
if(j>=0 && tget(j)) SA[--bkt[chr(j)]]=j;
}
}

// find the suffix array SA of s[0..n-1] in {1..K}^n
// require s[n-1]=0 (the sentinel!), n>=2
// use a working space (excluding s and SA) of at most 2.25n+O(1) for a constant alphabet
void SA_IS(unsigned char *s, int *SA, int n, int K, int cs) {
int i, j;
unsigned char *t=(unsigned char *)malloc(n/8+1); // LS-type array in bits
// Classify the type of each character
tset(n-2, 0); tset(n-1, 1); // the sentinel must be in s1, important!!!
for(i=n-3; i>=0; i--)
tset(i, (chr(i)<chr(i+1) || (chr(i)==chr(i+1) && tget(i+1)==1))?1:0);
// stage 1: reduce the problem by at least 1/2
// sort all the S-substrings
int *bkt = (int *)malloc(sizeof(int)*(K+1)); // bucket array
getBuckets(s, bkt, n, K, cs, true); // find ends of buckets
for(i=0; i<n; i++) SA[i]=-1;
for(i=1; i<n; i++)
if(isLMS(i)) SA[--bkt[chr(i)]]=i;
induceSAl(t, SA, s, bkt, n, K, cs, false);
induceSAs(t, SA, s, bkt, n, K, cs, true);
free(bkt);
// compact all the sorted substrings into the first n1 items of SA
// 2*n1 must be not larger than n (proveable)
int n1=0;
for(i=0; i<n; i++)
if(isLMS(SA[i])) SA[n1++]=SA[i];
// find the lexicographic names of all substrings
for(i=n1; i<n; i++) SA[i]=-1; // init the name array buffer
int name=0, prev=-1;
for(i=0; i<n1; i++) {
int pos=SA[i]; bool diff=false;
for(int d=0; d<n; d++)
if(prev==-1 || chr(pos+d)!=chr(prev+d) || tget(pos+d)!=tget(prev+d))
{ diff=true; break; }
else if(d>0 && (isLMS(pos+d) || isLMS(prev+d))) break;
if(diff) { name++; prev=pos; }
pos=(pos%2==0)?pos/2:(pos-1)/2;
SA[n1+pos]=name-1;
}
for(i=n-1, j=n-1; i>=n1; i--)
if(SA[i]>=0) SA[j--]=SA[i];

// stage 2: solve the reduced problem
// recurse if names are not yet unique
int *SA1=SA, *s1=SA+n-n1;
if(name<n1)
SA_IS((unsigned char*)s1, SA1, n1, name-1, sizeof(int));
else // generate the suffix array of s1 directly
for(i=0; i<n1; i++) SA1[s1[i]] = i;
// stage 3: induce the result for the original problem
bkt = (int *)malloc(sizeof(int)*(K+1)); // bucket array
// put all left-most S characters into their buckets
getBuckets(s, bkt, n, K, cs, true); // find ends of buckets
for(i=1, j=0; i<n; i++)
if(isLMS(i)) s1[j++]=i; // get p1
for(i=0; i<n1; i++) SA1[i]=s1[SA1[i]]; // get index in s
for(i=n1; i<n; i++) SA[i]=-1; // init SA[n1..n-1]
for(i=n1-1; i>=0; i--) {
j=SA[i]; SA[i]=-1;
SA[--bkt[chr(j)]]=j;
}
induceSAl(t, SA, s, bkt, n, K, cs, false);
induceSAs(t, SA, s, bkt, n, K, cs, true);
free(bkt); free(t);
}

/*******************************************************************/
/*******************************************************************/
/*******************************************************************/

inline bool leq(int a1, int a2,   int b1, int b2) { // lexic. order for pairs
  return(a1 < b1 || a1 == b1 && a2 <= b2); 
}                                                   // and triples
inline bool leq(int a1, int a2, int a3,   int b1, int b2, int b3) {
  return(a1 < b1 || a1 == b1 && leq(a2,a3, b2,b3)); 
}
// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
static void radixPass(int* a, int* b, int* r, int n, int K) 
{ // count occurrences
  int* c = new int[K + 1];                          // counter array
  for (int i = 0;  i <= K;  i++) c[i] = 0;         // reset counters
  for (int i = 0;  i < n;  i++) c[r[a[i]]]++;    // count occurences
  for (int i = 0, sum = 0;  i <= K;  i++) { // exclusive prefix sums
     int t = c[i];  c[i] = sum;  sum += t;
  }
  for (int i = 0;  i < n;  i++) b[c[r[a[i]]]++] = a[i];      // sort
  delete [] c;
}

// find the suffix array SA of s[0..n-1] in {1..K}^n
// require s[n]=s[n+1]=s[n+2]=0, n>=2
void suffixArray(int* s, int* SA, int n, int K) {
  int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2; 
  int* s12  = new int[n02 + 3];  s12[n02]= s12[n02+1]= s12[n02+2]=0; 
  int* SA12 = new int[n02 + 3]; SA12[n02]=SA12[n02+1]=SA12[n02+2]=0;
  int* s0   = new int[n0];
  int* SA0  = new int[n0];
 
  // generate positions of mod 1 and mod  2 suffixes
  // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
  for (int i=0, j=0;  i < n+(n0-n1);  i++) if (i%3 != 0) s12[j++] = i;

  // lsb radix sort the mod 1 and mod 2 triples
  radixPass(s12 , SA12, s+2, n02, K);
  radixPass(SA12, s12 , s+1, n02, K);  
  radixPass(s12 , SA12, s  , n02, K);

  // find lexicographic names of triples
  int name = 0, c0 = -1, c1 = -1, c2 = -1;
  for (int i = 0;  i < n02;  i++) {
    if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) { 
      name++;  c0 = s[SA12[i]];  c1 = s[SA12[i]+1];  c2 = s[SA12[i]+2];
    }
    if (SA12[i] % 3 == 1) { s12[SA12[i]/3]      = name; } // left half
    else                  { s12[SA12[i]/3 + n0] = name; } // right half
  }

  // recurse if names are not yet unique
  if (name < n02) {
    suffixArray(s12, SA12, n02, name);
    // store unique names in s12 using the suffix array 
    for (int i = 0;  i < n02;  i++) s12[SA12[i]] = i + 1;
  } else // generate the suffix array of s12 directly
    for (int i = 0;  i < n02;  i++) SA12[s12[i] - 1] = i; 

  // stably sort the mod 0 suffixes from SA12 by their first character
  for (int i=0, j=0;  i < n02;  i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i];
  radixPass(s0, SA0, s, n0, K);

  // merge sorted SA0 suffixes and sorted SA12 suffixes
  for (int p=0,  t=n0-n1,  k=0;  k < n;  k++) {
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
    int i = GetI(); // pos of current offset 12 suffix
    int j = SA0[p]; // pos of current offset 0  suffix
    if (SA12[t] < n0 ? 
        leq(s[i],       s12[SA12[t] + n0], s[j],       s12[j/3]) :
        leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]))
    { // suffix from SA12 is smaller
      SA[k] = i;  t++;
      if (t == n02) { // done --- only SA0 suffixes left
        for (k++;  p < n0;  p++, k++) SA[k] = SA0[p];
      }
    } else { 
      SA[k] = j;  p++; 
      if (p == n0)  { // done --- only SA12 suffixes left
        for (k++;  t < n02;  t++, k++) SA[k] = GetI(); 
      }
    }  
  } 
  delete [] s12; delete [] SA12; delete [] SA0; delete [] s0; 
}